
#############################################################################
#Figure D.2: Comparison of offtake from three data sources, by month
#This figure compares offtake from three data sources: NIC aggregates provided by the government, transaction data from ePOS records, and survey data from beneficiaries in kilograms
#############################################################################

#colors
Color1 <- "#0537b0"  #darkest blue
Color2 <- "#4160ab"  #dark blue
Color3 <- "#76a5db "  #medium blue
Color4 <- "#97B8DE"  #light blue
Color5 <- "#b7d0ed"  #lightest blue

cols <- colorspace::sequential_hcl(5, "blues 3")

library(ggplot2)
library(magrittr)

####################################################################################################################################
#1) scatterplots, by month, of NIC offtake vs TX data offtake

root          <- "/Users/tbrai/Dropbox/DBT/Data/ePOS RCT Jharkhand/ABBA replication package_20200814"
DataDir       <- paste0(root, "/Data/")
AdminDataDir  <- paste0(root, "/Data/Admin/")
OutputDir     <- paste0(here::here(), "/replication/exhibits/")

  #read in data 
  Dat1 <- read.csv(paste0(AdminDataDir, "adherence_disbursementstock_compare.csv"))

  Dat1_Jul <- subset(Dat1, Dat1$month==7)
  Dat1_Aug<- subset(Dat1, Dat1$month==8)
  Dat1_Sep<- subset(Dat1, Dat1$month==9)
  Dat1_Oct<- subset(Dat1, Dat1$month==10)
  
  monthlist<- c("July","August","September","October")
  
  Dat1$month_str<- NULL
  
  #gen month var with string
  Dat1$month_str[Dat1$month==7]<- "July"
  Dat1$month_str[Dat1$month==8]<- "August"
  Dat1$month_str[Dat1$month==9]<- "September"
  Dat1$month_str[Dat1$month==10]<- "October"
  
  #plot for each month
  for (mon in seq_along(monthlist) ){
    
    print(mon)
    
    tmp.dat<- subset(Dat1, Dat1$month_str==monthlist[mon])
  
    if(mon==1){
      
      Plot<- ggplot(tmp.dat, aes(x=offtake, y=agg_offtake)) + geom_point(color=cols[mon],size=1,shape=16)+ 
        geom_abline(intercept = 0,slope=1,color="black",size=.5)+geom_vline(xintercept = 0,color="black",size=.5)+
        geom_hline(yintercept = 0,color="black",size=.5)+
        ylab("Offtake from NIC aggregates") + xlab("Offtake from transaction data")+ggtitle(monthlist[mon])+
        theme(plot.title = element_text(hjust=.5, vjust=.5,size=15),panel.background = element_blank(),
              panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),
              legend.title=element_blank(), legend.position="bottom",
              legend.key.size = unit(1, "cm"), legend.key.width = unit(2, "cm"),
              legend.text = element_text(size = 17, colour = "black"),
              axis.title=element_text(size=12), text = element_text(size=12),
              axis.ticks.length=unit(0.3,"cm"))+ coord_cartesian(xlim=c(0, 180000),ylim=c(0, 180000)) + 
        scale_x_continuous(limits=c(0,175000),breaks = seq(0,175000,45000),expand=c(0,0),label=scales::comma)+ 
        scale_y_continuous(limits=c(0,175000),breaks = seq(0,175000,45000),expand=c(0,0),label=scales::comma)
    }
    
    if(mon!=1){
      
      Plot<- ggplot(tmp.dat, aes(x=offtake, y=agg_offtake)) + geom_point(color=cols[mon],size=1,shape=16)+ 
        geom_abline(intercept = 0,slope=1,color="black",size=.5)+geom_vline(xintercept = 0,color="black",size=.5)+
        geom_hline(yintercept = 0,color="black",size=.5)+
        xlab("Offtake from transaction data")+ggtitle(monthlist[mon])+
        theme(plot.title = element_text(hjust=.5, vjust=.5,size=15),panel.background = element_blank(),axis.title.y=element_blank(),
              panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),
              legend.title=element_blank(), legend.position="bottom",
              legend.key.size = unit(1, "cm"), legend.key.width = unit(2, "cm"),
              legend.text = element_text(size = 17, colour = "black"),
              axis.title=element_text(size=12), text = element_text(size=12),
              axis.ticks.length=unit(0.3,"cm"))+ coord_cartesian(xlim=c(0, 180000),ylim=c(0, 180000)) + 
        scale_x_continuous(limits=c(0,175000),breaks = seq(0,175000,45000),expand=c(0,0),label=scales::comma)+ 
        scale_y_continuous(limits=c(0,175000),breaks = seq(0,175000,45000),expand=c(0,0),label=scales::comma)
    }
    
      
    assign(paste0("NIC",monthlist[mon]), Plot)

    rm(tmp.dat,Plot)
  }
  
  
  
#2) scatterplots, by month, of survey offtake vs TX data offtake
  #read in data 
  Dat2<- readstata13::read.dta13(paste0(DataDir, "adherence_offtake_compare.dta"))
  
  monthlist.dat<-c("_jul17","_aug17","_sep17","_oct17")
  monthlist<-c("July","August","September","October")
  
  Dat2$month_str<- NULL
  Dat2$month_str[Dat2$month==7]<- "July"
  Dat2$month_str[Dat2$month==8]<- "August"
  Dat2$month_str[Dat2$month==9]<- "September"
  Dat2$month_str[Dat2$month==10]<- "October"
  
  #plot for each month
  for (mon in seq_along(monthlist) ){
    
    print(mon)
    
    tmp.dat<- subset(Dat2, Dat2$month_str==monthlist[mon])
    
    if(mon==1){
      
    Plot<- ggplot(tmp.dat, aes(x=quantity_grain, y=liftedquantity_grain)) + geom_point(color=cols[mon],size=1,shape=16)+ 
      geom_abline(intercept = 0,slope=1,color="black",size=.5)+geom_vline(xintercept = 0,color="black",size=.5)+
      geom_hline(yintercept = 0,color="black",size=.5)+
      ylab("Offtake from transaction data") + xlab("Offtake from survey data")+ggtitle(monthlist[mon])+
      theme(plot.title = element_text(hjust=.5, vjust=.5,size=15),panel.background = element_blank(),
            panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),
            legend.title=element_blank(), legend.position="bottom",
            legend.key.size = unit(1, "cm"), legend.key.width = unit(2, "cm"),
            legend.text = element_text(size = 17, colour = "black"),
            axis.title=element_text(size=12), text = element_text(size=12),
            axis.ticks.length=unit(0.3,"cm"))+ coord_cartesian(xlim=c(0, 210),ylim=c(0, 210)) + 
      scale_x_continuous(limits=c(0,200),breaks = seq(0,200,50),expand=c(0,0))+ 
      scale_y_continuous(limits=c(0,200),breaks = seq(0,200,50),expand=c(0,0))
    }
    
    
  
  
  if(mon!=1){
    
    Plot<- ggplot(tmp.dat, aes(x=quantity_grain, y=liftedquantity_grain)) + geom_point(color=cols[mon],size=1,shape=16)+ 
      #geom_smooth(method=lm,se=F, size=1,color=Color2,linetype="dashed",fullrange=T,formula=y~x)+
      geom_abline(intercept = 0,slope=1,color="black",size=.5)+geom_vline(xintercept = 0,color="black",size=.5)+
      geom_hline(yintercept = 0,color="black",size=.5)+
      xlab("Offtake from survey data")+ggtitle(monthlist[mon])+
      theme(plot.title = element_text(hjust=.5, vjust=.5,size=15),panel.background = element_blank(),axis.title.y=element_blank(),
            panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),
            legend.title=element_blank(), legend.position="bottom",
            legend.key.size = unit(1, "cm"), legend.key.width = unit(2, "cm"),
            legend.text = element_text(size = 17, colour = "black"),
            axis.title=element_text(size=12), text = element_text(size=12),
            axis.ticks.length=unit(0.3,"cm"))+ coord_cartesian(xlim=c(0, 210),ylim=c(0, 210)) + 
      scale_x_continuous(limits=c(0,200),breaks = seq(0,200,50),expand=c(0,0))+ 
      scale_y_continuous(limits=c(0,200),breaks = seq(0,200,50),expand=c(0,0))
  }
  
    assign(paste0("Survey",monthlist[mon]), Plot)
    
    rm(tmp.dat,Plot)
  }

####
  # new version
  Dat2 %>% 
    dplyr::filter(month %in% c(7, 8, 9, 10)) %>% 
    ggplot(mapping = aes(x = quantity_grain, y = liftedquantity_grain)) + 
    geom_point(size = 2, shape = 16, color = "#97b8de") + 
    geom_abline(intercept = 0, slope = 1, color = "black", size = .5) + 
    geom_vline(xintercept = 0, color = "black", size = .5) +
    geom_hline(yintercept = 0, color = "black", size = .5) +
    ylab("Offtake from transaction data") + 
    xlab("Offtake from survey data") +
    #ggtitle("Offtake from transaction data vs. Offtake from survey data") +
    theme(plot.title = element_text(hjust = .5, vjust = .5, size = 18), 
          panel.background = element_blank(),
          panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(), 
          panel.border = element_blank(),
          legend.title = element_blank(), 
          legend.position = "bottom",
          legend.key.size = unit(1, "cm"), 
          legend.key.width = unit(2, "cm"),
          legend.text = element_text(size = 20, colour = "black"),
          axis.title = element_text(size = 20), 
          text = element_text(size = 18),
          axis.ticks.length = unit(0.3, "cm")) + 
    coord_cartesian(xlim = c(0, 210), ylim = c(0, 210)) + 
    scale_x_continuous(limits = c(0,200), breaks = seq(0, 200, 50), expand = c(0,0)) + 
    scale_y_continuous(limits = c(0,200), breaks = seq(0, 200, 50), expand = c(0,0))
  ggsave(plot = last_plot(), file = paste(OutputDir, "adherence_scatterplot_offtake_comparison.pdf", sep=""), width=15, height=8.5)
  
  ########### Combine and arrange plots in one figure #################
  
  arrangedplot<- ggpubr::ggarrange(NICJuly, NICAugust, NICSeptember, NICOctober,ncol=4, nrow=1)
  
  arrangedplot2 <- ggpubr::ggarrange(SurveyJuly, SurveyAugust, SurveySeptember, SurveyOctober,ncol = 4, nrow = 1)
  
  #create blank canvas for spacing
  plot.blank<- ggplot()+theme_void()
  arrangedplot_blank<- ggpubr::ggarrange(plot.blank,ncol = 4, nrow = 1)
  
  
  arrangedplot_combined<- ggpubr::ggarrange(plot.blank, arrangedplot,arrangedplot_blank, arrangedplot2,
                                  labels = c(" ","Panel A: Offtake from NIC aggregates vs. Offtake from transaction data ", " ", "Panel B: Offtake from transaction data vs. Offtake from survey data"),
                                  ncol = 1, nrow = 4, heights=c(.2,1,.2,1), hjust=c(0,0,0,0),vjust=c(0,-1,0,-1),font.label = list(face = "bold.italic"))
  
  
  
  ggsave(file=paste(OutputDir, "FigureD_2.pdf", sep=""), width=15, height=8.5)
  
  
